import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import numpy as np
import plotly.io as pio
pio.renderers.default ='notebook'
Lionel Andres Messi started his career in 2003, scoring a total of 701 goals playing for different Clubs, not counting the Argentinian National Team. The Follow dataset analyses and presents that information along different charts and graphs.
df = pd.read_csv('messi.csv')
df['Date'] = pd.to_datetime(df['Date'])
df['Goles'] = 1
uno = df.groupby(['Tournament']).count()
colors = px.colors.cyclical.Phase
fig = px.pie(df, names='Tournament', values='Goles', height=800, width=1020,color_discrete_sequence= colors)
fig.update_traces(hoverinfo='label+percent', textinfo='value', textfont_size=15,
marker=dict(colors=colors, line=dict(color='#000000', width=1)))
fig.update_layout(uniformtext_minsize=12, uniformtext_mode='hide' ,title_text='Goals Scored by Tournament',title_x=0.45)
fig.show()
color = px.colors.qualitative.Set1
fig = px.histogram(df,y=df.Goles.sort_values(), x='Tournament', height=800, width=1020, barmode='relative',text_auto = True,title='Goals scored by Tournament',color='Tournament',
color_discrete_sequence= color).update_xaxes(categoryorder="total descending")
fig.show()
# minmatch = pd.DataFrame()
# minmatch['Tournament'] = df2['Tournament']
# minmatch['MinuteNum2'] = df2['MinuteNum2']
# ser = df2.groupby(['Tournament','MinuteNum2']).Goles.count()
# ser = ser.unstack().fillna(0).astype('int')
df2 = df.copy()
df2['MinuteNum'] = df2['Minute']
df2['MinuteNum'] = df2['MinuteNum'].str.replace('[^0-9]', '',regex=True).astype('int64')
df2['MinuteNum2'] = df2['MinuteNum']
df2.loc[df2['MinuteNum2'] > 90, 'MinuteNum2'] = 93
ordernumb = df2.groupby(['MinuteNum2'])['Tournament'].count().to_frame().reset_index()
ordernumb = ordernumb.rename(columns={'MinuteNum2': 'Minute', 'Tournament': 'Goals Scored'})
ordernumb2 = ordernumb.copy()
ordernumb2
| Minute | Goals Scored | |
|---|---|---|
| 0 | 2 | 1 |
| 1 | 3 | 4 |
| 2 | 4 | 4 |
| 3 | 5 | 8 |
| 4 | 6 | 2 |
| ... | ... | ... |
| 85 | 87 | 13 |
| 86 | 88 | 9 |
| 87 | 89 | 9 |
| 88 | 90 | 12 |
| 89 | 93 | 33 |
90 rows × 2 columns
fig = px.bar(ordernumb2, x='Minute', y= 'Goals Scored', height=600, width=1020, title='Minute of the game when Messi scored' \
,color='Goals Scored', text='Goals Scored')
fig.update_traces(texttemplate='%{text:.1.5s}', textposition='outside')
# fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')
fig.show()
The data shows that during his career, he has scored more goals (14) at minute 55', then at minute 78' and 87' he has 13, being the second and the third largest amount 87'. All the goals converted during the extra time have been summed and totalized aside, with a total of 33.
color = px.colors.qualitative.Set1
fig = px.box(df2, y='MinuteNum2', x="Tournament",color="Tournament", color_discrete_sequence= color, boxmode= "overlay",height=600, width=1020 )
fig.show()
The graph shown before indicates the mean, median and quarter when Messi has scored all his goals, divided also by Tournament.
rival = df2.groupby(['Opponent'])['Goles'].count().sort_values(ascending=False).to_frame('Total').head(25).reset_index()
rival
| Opponent | Total | |
|---|---|---|
| 0 | Sevilla FC | 38 |
| 1 | Atlético de Madrid | 32 |
| 2 | Valencia CF | 31 |
| 3 | Athletic Bilbao | 29 |
| 4 | Real Betis Balompié | 26 |
| 5 | Real Madrid | 26 |
| 6 | CA Osasuna | 25 |
| 7 | RCD Espanyol Barcelona | 25 |
| 8 | Levante UD | 24 |
| 9 | Getafe CF | 21 |
| 10 | Deportivo de La Coruña | 20 |
| 11 | SD Eibar | 20 |
| 12 | Rayo Vallecano | 18 |
| 13 | Real Sociedad | 18 |
| 14 | RCD Mallorca | 16 |
| 15 | Villarreal CF | 16 |
| 16 | Granada CF | 15 |
| 17 | Real Zaragoza | 14 |
| 18 | Celta de Vigo | 14 |
| 19 | Málaga CF | 13 |
| 20 | UD Almería | 13 |
| 21 | Deportivo Alavés | 13 |
| 22 | Racing Santander | 12 |
| 23 | CD Leganés | 11 |
| 24 | Arsenal FC | 9 |
fig = px.bar(rival,height=800, width=1020,y='Total', x='Opponent',color='Opponent', color_continuous_scale=px.colors.qualitative.Set2, text='Total', title='Which team has received most of Messis Goals (Top 25)')
fig.update_traces(texttemplate='%{text:1.3.s}', textposition='outside')
fig.update_xaxes(tickangle=45)
fig.show()
Sevilla, Atletico de Madrid and Valencia FC were the teams more affected by Messi with a total of 38, 32 and 31 goals suffered by the argentinian striker
df2['Date'] = df2['Date'].apply(pd.Timestamp)
df2['Date_M'] = df2['Date'].apply(pd.Timestamp)
df2['Date_M'] = pd.to_datetime(df2['Date_M']).dt.to_period('M')
df2['Date_Y'] = df2['Date'].dt.year
years = df2.groupby(['Date_Y'])['Goles'].count().to_frame('Total').reset_index()
fig = px.line(years, x="Date_Y", y='Total', text="Total",height=700, width=1100, title='All Goals scored by Messi during his Career (Total)')
fig.update_traces(textposition="bottom right")
fig.update_layout(
xaxis = dict(
tickmode = 'linear',
tick0 = 1
)
)
fig.show()
During the period of 2003 to 2022, the year 2012 was the best for Leo Messi, scoring a total of 79 goals just playing for Barcelona, not taking in consideration his performance at the Argentinean National Team.
years2 = df2.groupby(['Date_Y','Tournament'])['Goles'].count().to_frame('Total').reset_index()
years2
| Date_Y | Tournament | Total | |
|---|---|---|---|
| 0 | 2004 | 2ª B - Grupo III | 5 |
| 1 | 2005 | 2ª B - Grupo III | 1 |
| 2 | 2005 | Champions League | 1 |
| 3 | 2005 | LaLiga | 2 |
| 4 | 2006 | Champions League | 1 |
| ... | ... | ... | ... |
| 64 | 2021 | LaLiga | 23 |
| 65 | 2021 | Ligue 1 | 1 |
| 66 | 2022 | Champions League | 4 |
| 67 | 2022 | Ligue 1 | 12 |
| 68 | 2022 | Trophée des Champions | 1 |
69 rows × 3 columns
fig = px.line(years2, x="Date_Y", y='Total', text="Total",height=700, width=1100,color='Tournament',markers=True, title='Goals scored by Messi during his Career (By Tournament)')
fig.update_traces(textposition="bottom right")
fig.update_layout(
xaxis = dict(
tickmode = 'linear',
tick0 = 1
)
)
fig.show()